Dataset & users demographics

Loading data

load(file = paste0(IO$output_data, "users.Rdata"), verbose = TRUE)
## Loading objects:
##   users
load(file = paste0(IO$output_data, "cycles.Rdata"), verbose = TRUE)
## Loading objects:
##   cycles

Table 1

# average app usage duration
agg_start = aggregate(cycle_start ~ user_id, cycles, min)
colnames(agg_start) = c("user_id", "start_first_cycle")
agg_end = aggregate(cycle_start+cycle_length ~ user_id, cycles, max)
colnames(agg_end) = c("user_id", "end_last_cycle")
agg = merge(agg_start, agg_end)
m = match(users$user_id, agg$user_id)
agg = agg[m,]
agg$app_duration_in_days = as.numeric(agg$end_last_cycle - agg$start_first_cycle)
agg$app_duration_in_years = agg$app_duration_in_days / 365
ju = which(users$BC != "unclear")
jc = which(cycles$BC != "unclear")

# Number of users
df = data.frame(field = "Number of users",original_dataset = nrow(users), filtered_dataset  = sum(users$BC != "unclear"))

# Number of cycles
df = rbind(df,
           data.frame(field = "Number of cycles",original_dataset = nrow(cycles), filtered_dataset  = sum(cycles$BC != "unclear")))

# Number of breast tenderness symptoms
df = rbind(df,
           data.frame(field = "Number of breast tenderness symptoms",original_dataset = sum(cycles$n_TB), filtered_dataset  = sum(cycles$n_TB[jc])))



# average app usage duration
df = rbind(df,
           data.frame(field = "Average app usage duration (in year)",
                      original_dataset = round(mean(agg$app_duration_in_years),2), 
                      filtered_dataset  = round(mean(agg$app_duration_in_years[ju]),2)))
df = rbind(df,
           data.frame(field = "Average app usage duration (sd)",
                      original_dataset = round(sd(agg$app_duration_in_years),2), 
                      filtered_dataset  = round(sd(agg$app_duration_in_years[ju]),2)))


# average app number of cycles
df = rbind(df,
           data.frame(field = "Average # of cycles per users",
                      original_dataset = round(mean(users$n_cycles),2), 
                      filtered_dataset  = round(mean(users$n_cycles[ju]),2)))
df = rbind(df,
           data.frame(field = "Average # of cycles (sd)",
                      original_dataset = round(sd(users$n_cycles),2), 
                      filtered_dataset  = round(sd(users$n_cycles[ju]),2)))

# average  number of breast tenderness per users
df = rbind(df,
           data.frame(field = "Average # of breast tenderness per users",
                      original_dataset = round(mean(users$n_tender_breasts),2), 
                      filtered_dataset  = round(mean(users$n_tender_breasts[ju]),2)))

df = rbind(df,
           data.frame(field = "Median # of breast tenderness",
                      original_dataset = round(median(users$n_tender_breasts),2), 
                      filtered_dataset  = round(median(users$n_tender_breasts[ju]),2)))

df = rbind(df,
           data.frame(field = "Average # of breast tenderness (sd)",
                      original_dataset = round(sd(users$n_tender_breasts),2), 
                      filtered_dataset  = round(sd(users$n_tender_breasts[ju]),2)))

# average number of breast tenderness per cycles
df = rbind(df,
           data.frame(field = "Average # of breast tenderness per cycle",
                      original_dataset = round(mean(cycles$n_TB),2), 
                      filtered_dataset  = round(mean(cycles$n_TB[jc]),2)))

df = rbind(df,
           data.frame(field = "Average # of breast tenderness per cycle (sd) ",
                      original_dataset = round(sd(cycles$n_TB),2), 
                      filtered_dataset  = round(sd(cycles$n_TB[jc]),2)))


# percentage of cycles per BC (original)
table_BC_CLUE_o = table(cycles$birth_control_CLUE)
table_BC_CLUE_o_perc = round(100*table_BC_CLUE_o/nrow(cycles),2)

table_BC_CLUE_f = table(cycles$birth_control_CLUE[jc])
table_BC_CLUE_f_perc = round(100*table_BC_CLUE_f/length(jc),2)

df = rbind(df,
           data.frame(field = paste0("% of cycles per BC as logged by the users (",names(table_BC_CLUE_o_perc),")"),
                      original_dataset = as.numeric(table_BC_CLUE_o_perc), 
                      filtered_dataset  = as.numeric(table_BC_CLUE_f_perc)))

# percentage of cycles per BC (re-assigned)

table_BC_o = table(cycles$BC)
table_BC_o_perc = round(100*table_BC_o/nrow(cycles),2)

table_BC_f = table(cycles$BC[jc])
table_BC_f_perc = round(100*table_BC_f/length(jc),2)

df = rbind(df,
           data.frame(field = paste0("% of cycles per BC as re-assigned (",names(table_BC_o_perc),")"),
                      original_dataset = as.numeric(table_BC_o_perc), 
                      filtered_dataset  = c(as.numeric(table_BC_f_perc),0)))



df
##                                                         field
## 1                                             Number of users
## 2                                            Number of cycles
## 3                        Number of breast tenderness symptoms
## 4                        Average app usage duration (in year)
## 5                             Average app usage duration (sd)
## 6                               Average # of cycles per users
## 7                                    Average # of cycles (sd)
## 8                    Average # of breast tenderness per users
## 9                               Median # of breast tenderness
## 10                        Average # of breast tenderness (sd)
## 11                   Average # of breast tenderness per cycle
## 12             Average # of breast tenderness per cycle (sd) 
## 13 % of cycles per BC as logged by the users (none / condoms)
## 14           % of cycles per BC as logged by the users (pill)
## 15  % of cycles per BC as logged by the users (did not enter)
## 16         % of cycles per BC as re-assigned (none / condoms)
## 17                   % of cycles per BC as re-assigned (pill)
## 18                % of cycles per BC as re-assigned (unclear)
##    original_dataset filtered_dataset
## 1         216484.00        210529.00
## 2        3686977.00       3056348.00
## 3        4530662.00       4132033.00
## 4              1.54             1.55
## 5              0.65             0.65
## 6             17.03            17.15
## 7              7.45             7.48
## 8             20.93            21.27
## 9              8.00             8.00
## 10            36.06            36.43
## 11             1.23             1.35
## 12             2.94             3.12
## 13            28.42            31.50
## 14            39.08            39.29
## 15            32.50            29.21
## 16            48.94            59.04
## 17            33.95            40.96
## 18            17.10             0.00
write.csv(df, file = paste0(IO$tables,"Table_1.csv") , row.names = FALSE)

Supplementary Table 1

table_n = table(cycles$birth_control_CLUE, cycles$BC)
table_perc = round( 100*table_n/apply(table_n, 1, sum) , 2)

rownames(table_n) = paste0(rownames(table_n), ": n cycles")
table_n
##                           
##                            none / condoms   pill unclear
##   none / condoms: n cycles         895215  67507   85295
##   pill: n cycles                   215958 984776  240084
##   did not enter: n cycles          693365 199527  305250
rownames(table_perc) = paste0(rownames(table_perc), ": % cycles")
table_perc
##                           
##                            none / condoms  pill unclear
##   none / condoms: % cycles          85.42  6.44    8.14
##   pill: % cycles                    14.99 68.35   16.66
##   did not enter: % cycles           57.87 16.65   25.48
table_BC = rbind(table_n,table_perc)
o = order(rownames(table_BC), decreasing = TRUE)
table_BC = table_BC[o,]
table_BC
##                          none / condoms      pill   unclear
## pill: n cycles                215958.00 984776.00 240084.00
## pill: % cycles                    14.99     68.35     16.66
## none / condoms: n cycles      895215.00  67507.00  85295.00
## none / condoms: % cycles          85.42      6.44      8.14
## did not enter: n cycles       693365.00 199527.00 305250.00
## did not enter: % cycles           57.87     16.65     25.48
write.csv(table_BC, file = paste0(IO$tables, "Supplementary_Table_BC_table.csv"))

Table 2: users’ features

mean(users$age)
## [1] 20.59862
sd(users$age)
## [1] 4.946749
mean(users$age[ju])
## [1] 20.61776
sd(users$age[ju])
## [1] 4.953888
round(100*table(users$age_cat)/nrow(users),2)
## 
## (10,15] (15,20] (20,25] (25,30] (30,35] (35,40] (40,45] 
##    9.14   51.75   23.20   10.42    4.34    1.05    0.11
round(100*table(users$age_cat[ju])/length(ju),2)
## 
## (10,15] (15,20] (20,25] (25,30] (30,35] (35,40] (40,45] 
##    9.10   51.61   23.29   10.47    4.36    1.06    0.11
lu(users$country)
## [1] 24
lu(users$country[ju])
## [1] 24
round(100*sort(table(users$country))/nrow(users),2)
## 
##      Venezuela        Ecuador        Ireland        Belgium         Sweden 
##           0.04           0.05           0.05           0.11           0.16 
##           Peru    Switzerland        Austria       Portugal         Russia 
##           0.17           0.19           0.24           0.52           0.74 
##        Denmark          Chile      Australia      Argentina       Colombia 
##           0.75           0.93           1.56           1.58           1.64 
##          Italy          Spain         Canada        Germany         France 
##           2.32           2.84           3.14           7.29           8.01 
##         Mexico United Kingdom         Brazil  United States 
##           8.37           9.12          20.49          29.71
round(100*sort(table(users$country[ju]))/length(ju),2)
## 
##      Venezuela        Ireland        Ecuador        Belgium         Sweden 
##           0.04           0.05           0.05           0.11           0.16 
##           Peru    Switzerland        Austria       Portugal         Russia 
##           0.17           0.19           0.24           0.53           0.74 
##        Denmark          Chile      Australia      Argentina       Colombia 
##           0.75           0.93           1.53           1.58           1.62 
##          Italy          Spain         Canada        Germany         France 
##           2.34           2.85           3.12           7.30           8.01 
##         Mexico United Kingdom         Brazil  United States 
##           8.42           9.05          20.48          29.74
mean(users$height, na.rm = TRUE)
## [1] 163.4378
sd(users$height, na.rm = TRUE)
## [1] 6.965825
mean(users$height[ju], na.rm = TRUE)
## [1] 163.4301
sd(users$height[ju], na.rm = TRUE)
## [1] 6.964167
mean(users$weight, na.rm = TRUE)
## [1] 61.05458
sd(users$weight, na.rm = TRUE)
## [1] 15.13491
mean(users$weight[ju], na.rm = TRUE)
## [1] 61.02502
sd(users$weight[ju], na.rm = TRUE)
## [1] 15.08613
mean(users$bmi, na.rm = TRUE)
## [1] 22.85278
sd(users$bmi, na.rm = TRUE)
## [1] 5.304415
mean(users$bmi[ju], na.rm = TRUE)
## [1] 22.84302
sd(users$bmi[ju], na.rm = TRUE)
## [1] 5.274114
round(100*sort(table(users$birth_control_o))/nrow(users),2)
## 
##       condoms          none          pill did not enter 
##         10.12         18.25         35.67         35.96
round(100*sort(table(users$birth_control_o[ju]))/length(ju),2)
## 
##       condoms          none did not enter          pill 
##         10.31         18.59         35.31         35.79
round(100*sort(table(users$BC))/nrow(users),2)
## 
##        unclear           both           pill none / condoms 
##           2.75          21.07          26.03          50.15
round(100*sort(table(users$BC[ju]))/length(ju),2)
## 
##           both           pill none / condoms 
##          21.67          26.76          51.57

Demographics

g = ggplot(users, aes(x = age, fill = birth_control))
g + geom_histogram(binwidth = 1) + facet_grid(birth_control ~.)

g = ggplot(users, aes(x = age, fill = country))
g + geom_histogram(binwidth = 1) + facet_wrap(country ~.)

load(file = paste0(IO$out_Rdata, "number_of_users_and_cycles_per_category.Rdata"), verbose = TRUE)
## Loading objects:
##   agg
g = ggplot(agg, aes(y = BC, x= 0,   col = BC)) + 
  facet_grid(bmi_cat ~ age_cat) + 
  geom_text(aes(label = n_users)) + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.ticks = element_blank(),
        axis.text.x = element_blank()) + 
  guides(color = FALSE, size = FALSE) + xlab("") + ylab("") + 
  ggtitle("Number of users per category of age (horizontal) and bmi (vertical)")

g
## Warning: Removed 16 rows containing missing values (geom_text).

ggsave(filename = paste0(IO$panels, "S1_X_number_of_users_per_cat.pdf"), plot = g)
## Saving 7 x 5 in image
## Warning: Removed 16 rows containing missing values (geom_text).
g = ggplot(agg, aes(y = BC, x= 0,   col = BC)) + 
  facet_grid(bmi_cat ~ age_cat) + 
  geom_text(aes(label = n_cycles)) + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.ticks = element_blank(),
        axis.text.x = element_blank()) + 
  guides(color = FALSE, size = FALSE) + xlab("") + ylab("") +
  ggtitle("Number of cycles per category of age (horizontal) and bmi (vertical)")
g
## Warning: Removed 51 rows containing missing values (geom_text).

ggsave(filename = paste0(IO$panels, "S1_X_number_of_cycles_per_cat.pdf"), plot = g)
## Saving 7 x 5 in image
## Warning: Removed 51 rows containing missing values (geom_text).

Figure 1: Examples and Aggregated symptoms

Figure 1: Number of Breast Tenderness per users

ju = which(users$BC %in% as.character(par$BC_dict$name))
g = ggplot(users[ju,], aes(x = n_tender_breasts, fill = BC))+
  geom_histogram(binwidth = 1, position = "identity", alpha = 0.5)+xlim(c(-1,20))
g
## Warning: Removed 49439 rows containing non-finite values (stat_bin).
## Warning: Removed 4 rows containing missing values (geom_bar).

j = which((users$BC %in% as.character(par$BC_dict$name)) & 
            (users$perc_cycles_with_TB>=0) & 
            (users$perc_cycles_with_TB<=1))

g = ggplot(users[j,], aes(x = perc_cycles_with_TB, fill = BC))+
  geom_histogram(binwidth = 0.1, position = "identity", alpha = 0.5)
g

table_BC_BT = table(users$BC[j], round(users$perc_cycles_with_TB[j],1))
table_BC_BT
##                 
##                      0   0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8
##   none / condoms 15508 10789 11093  8348  8965  8314  8283  7290  9043
##   pill           23834 10241  6815  3493  2846  2012  1648  1133  1364
##                 
##                    0.9     1
##   none / condoms  7646  6801
##   pill             809   859
table_BC_BT_perc = round(100*table_BC_BT/apply(table_BC_BT,1,sum),2)
table_BC_BT_perc
##                 
##                      0   0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8
##   none / condoms 15.19 10.57 10.87  8.18  8.78  8.14  8.11  7.14  8.86
##   pill           43.29 18.60 12.38  6.34  5.17  3.65  2.99  2.06  2.48
##                 
##                    0.9     1
##   none / condoms  7.49  6.66
##   pill            1.47  1.56
table_BC_BT_perc_df = as.data.frame(table_BC_BT_perc)
colnames(table_BC_BT_perc_df) = c("BC","perc_cycles_with_TB","perc_users")


g = ggplot(table_BC_BT_perc_df, aes(x = perc_cycles_with_TB,y = perc_users, fill = BC))+
  geom_bar(stat = "identity",position = "dodge", alpha = 1)+
  xlab("Fraction of cycles with at least one reported breast tenderness")+
  ylab("% of users")+
  geom_text(data = data.frame(BC = names(table(users$BC[j])),n_users = as.numeric(table(users$BC[j]))),
            aes(col = BC, label = paste("tot # users:",n_users), 
                x = 8,y = 0.6*max(table_BC_BT_perc_df$perc_users)+6*as.numeric(BC)))+
  scale_fill_manual(values = cols$BC)
g

But the pill users tend to only track pill and period:

users$f_other_logs = pmax(0,users$n_other_logs/users$n_logs)

f_other_logs_threshold = 10/100

ggplot(users[j,], aes(x = f_other_logs, fill = BC ))+
  geom_histogram(position = "identity", alpha = 0.3)+
  geom_vline(xintercept = f_other_logs_threshold, linetype = 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).

t = table(users$BC[j], users$f_other_logs[j] >= f_other_logs_threshold)
100*t/apply(t,1,sum)
##                 
##                       FALSE       TRUE
##   none / condoms  0.6886755 99.3113245
##   pill           22.1622800 77.8377200
tracking_types = c("tracking period (& pill) only","tracking other features")
users$f_other_logs_binary = tracking_types[(users$f_other_logs >= f_other_logs_threshold)+1]
g = ggplot(users[j,], aes(x = perc_cycles_with_TB, fill = BC))+
  geom_histogram(binwidth = 0.1, position = "identity", alpha = 0.5)+
  facet_grid(f_other_logs_binary ~ . )
g

agg = aggregate( user_id ~ BC + f_other_logs_binary + round(users$perc_cycles_with_TB[j],1), users[j,], lu)
colnames(agg) = c("BC","tracking_type","perc_cycles_with_TB","n_users")

agg_tot = aggregate(n_users ~ BC,agg, sum)
agg$tot_n_users_per_BC = agg_tot$n_users[match(agg$BC, agg_tot$BC)]
agg$perc_users = 100*agg$n_users/agg$tot_n_users_per_BC

agg$tracking_type = factor(agg$tracking_type, levels = tracking_types)



g = ggplot(agg, aes(x = BC,y = perc_users, fill = BC))+
  geom_bar(aes(alpha = tracking_type),stat = "identity",position = "stack")+
  xlab("Fraction of cycles with at least 1 breast tenderness")+
  ylab("% of users")+
  scale_fill_manual(values = cols$BC)+
  scale_alpha_discrete(name = "tracking type", range = c(0.5,1))+
  facet_grid(. ~ perc_cycles_with_TB, switch = "x")+
  scale_x_discrete(breaks = NULL)+
  guides(fill = guide_legend(order=1),
         alpha = guide_legend(order=2),
         col = FALSE)
## Warning: Using alpha for a discrete variable is not advised.
g

ggsave(file = paste0(IO$panels, "perc of users vs perc of cycles with at least one BT.pdf"),
       plot = g, 
       width = viz$full_width/2,
       height = 0.4*viz$full_width/2,
       scale = viz$scale*0.85)


agg_tot$labels = paste0("tot # users: ",agg_tot$n_users)
g_n_users = ggplot(agg_tot, aes(col = BC))+
  geom_text(aes(label = labels), x = 1, y = 1)+
  xlim(c(0,3))+ylim(c(0,2))+
  facet_grid(BC ~ .)+
  scale_color_manual(values = cols$BC)+
  guides(col = FALSE)+
  theme(strip.text = element_blank(),
        panel.grid = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank() )
g_n_users

ggsave(file = paste0(IO$panels, "tot number of users for perc of users vs perc of cycles with at least one BT.pdf"),
       plot = g_n_users, 
       width = viz$full_width/6,
       height = viz$full_width/20,
       scale = viz$scale)

Figure 1: Examples

#load(paste0(IO$input_data,"days/days_1.Rdata"), verbose = TRUE)

load(file = paste0(IO$out_Rdata,"sub_days_examples_input_data.Rdata"), verbose = TRUE)
## Loading objects:
##   sub_days
days = sub_days

user_ids = unique(days$user_id)

for(user_id in user_ids){
  j = which((days$user_id == user_id)&(!is.na(days$cycle_id_m)) & (days$category != "pill_hbc") & (days$cycle_nb_m %in% 4:8))
  d = days[j,]
  g = ggplot_user_example(d = d, title = FALSE, size_factor = 1.5)
  print(g)
  
  ggsave(filename = paste0(IO$panels, "example_",user_id,".pdf"), plot = g, width = viz$full_width/2.8, height = viz$full_width/4)
}
## Warning: Removed 78 rows containing missing values (geom_point).

## Warning: Removed 78 rows containing missing values (geom_point).

## Warning: Removed 16 rows containing missing values (geom_point).
## Warning: Removed 16 rows containing missing values (geom_point).

## Warning: Removed 16 rows containing missing values (geom_point).

## Warning: Removed 16 rows containing missing values (geom_point).

## Warning: Removed 32 rows containing missing values (geom_point).
## Warning: Removed 32 rows containing missing values (geom_point).

## Warning: Removed 34 rows containing missing values (geom_point).
## Warning: Removed 34 rows containing missing values (geom_point).

## Warning: Removed 16 rows containing missing values (geom_point).
## Warning: Removed 16 rows containing missing values (geom_point).

## Warning: Removed 19 rows containing missing values (geom_point).
## Warning: Removed 19 rows containing missing values (geom_point).

rm(days, user_id, user_ids, g, d, j)

Figure 1: Tracking groups

load(file = paste0(IO$out_Rdata,"tracking_behavior_overal_pattern_by_tracking_group_and_birth_control.Rdata"), verbose = TRUE)
## Loading objects:
##   agg
unique(agg[,c("BC","tracking_group","tot_n_cycles")])
##                 BC         tracking_group tot_n_cycles
## 1   none / condoms     menstrual tracking       368956
## 27            pill     menstrual tracking       220308
## 53  none / condoms pre-menstrual tracking       254472
## 79            pill pre-menstrual tracking        37063
## 105 none / condoms       regular tracking       459701
## 131           pill       regular tracking       700152
## 157 none / condoms      sporadic tracking       684557
## 183           pill      sporadic tracking       263187
agg$SE = agg$fraction_cycles_with_logs_SE

g = ggplot(agg, aes(x = cycleday_m_D, y = fraction_cycles_with_logs, col = BC, shape = BC)) + 
  geom_vline(xintercept = 0, col = "gray90", size = 1.5)+
  geom_ribbon(aes(ymin = fraction_cycles_with_logs-1.96*SE, ymax = fraction_cycles_with_logs+1.96*SE, fill = BC),
              alpha = 0.5, col = NA)+
  geom_line() + geom_point()+ 
  scale_y_continuous(labels = scales::percent)+ scale_x_continuous(breaks = par$x.axis)+
  scale_colour_manual(values= cols$BC , name="BC")+
  scale_fill_manual(values= cols$BC , name="BC")+
  guides(col = FALSE, fill = FALSE, shape = FALSE)+
  xlab("cycleday from 1st day of menstruation") + ylab("% cycles with any log")+
  theme(legend.position="bottom",legend.background = element_rect(color="gray90", fill = NA))+
  facet_grid( . ~ tracking_group)
g

ggsave(file = paste0(IO$panels, "tracking_profile_perc_cycles_with_logs_per_tracking_group_and_BC.pdf"),
       plot = g, 
       width = viz$full_width/2,
       height = 0.3*viz$full_width/2,
       scale = viz$scale*1.2)


g_n = ggplot(agg, aes(x = cycleday_m_D, y = avg_n_logs, col = BC, shape = BC)) + 
  geom_vline(xintercept = 0, col = "gray90", size = 1.5)+
  geom_line() + geom_point()+ 
  scale_x_continuous(breaks = par$x.axis)+
  scale_colour_manual(values= cols$BC , name="BC")+
  scale_fill_manual(values= cols$BC , name="BC")+
  guides()+
  xlab("cycleday from 1st day of menstruation") + ylab("average # of logs")+
  theme(legend.position="bottom",legend.background = element_rect(color="gray90", fill = NA))+
  facet_grid( . ~ tracking_group)+
  geom_text(aes(x = -par$D, y = 9.25+ 1*(match(BC, par$BC_dict$name)-1), label = paste0("n: ",tot_n_cycles)),
            hjust = 0, vjust = 0,show.legend = FALSE)
g_n

ggsave(file = paste0(IO$panels, "tracking_profile_avg_log_per_tracking_group_and_BC.pdf"),
       plot = g_n, 
       width = viz$full_width/2,
       height = 0.4*viz$full_width/2,
       scale = viz$scale*1.2)

Figure 1: Overall tender breasts profiles per tracking groups

load(file = paste0(IO$out_Rdata,"TB_overal_pattern_by_tracking_group_and_birth_control.Rdata"), verbose = TRUE)
## Loading objects:
##   agg
unique(agg[,c("BC","tracking_group","tot_n_cycles")])
##                 BC         tracking_group tot_n_cycles
## 1   none / condoms     menstrual tracking       368956
## 27            pill     menstrual tracking       220308
## 53  none / condoms pre-menstrual tracking       254472
## 79            pill pre-menstrual tracking        37063
## 105 none / condoms       regular tracking       459701
## 131           pill       regular tracking       700152
## 157 none / condoms      sporadic tracking       684557
## 183           pill      sporadic tracking       263187
agg$SE = agg$fraction_cycles_with_TB_SE

g = ggplot(agg, aes(x = cycleday_m_D, y = fraction_cycles_with_TB, col = BC, shape = BC)) + 
  geom_vline(xintercept = 0, col = "gray90", size = 1.5)+
  geom_ribbon(aes(ymin = fraction_cycles_with_TB-1.96*SE, ymax = fraction_cycles_with_TB+1.96*SE, fill = BC), alpha = 0.5, col = NA)+
  geom_line() + geom_point()+ 
  scale_y_continuous(labels = scales::percent)+ scale_x_continuous(breaks = par$x.axis)+
  scale_colour_manual(values= cols$BC , name="BC")+
  scale_fill_manual(values= cols$BC , name="BC")+
  guides(col = FALSE, fill = FALSE, shape = FALSE)+
  xlab("cycleday from 1st day of menstruation") + ylab("% cycles with reported BT")+
  theme(legend.position="bottom",legend.background = element_rect(color="gray90", fill = NA))+
  facet_grid( . ~ tracking_group)+
  geom_text(aes(x = -par$D, y = 0.27+ 0.03*(match(BC, par$BC_dict$name)-1), label = paste0("n: ",tot_n_cycles)),
            hjust = 0, vjust = 1)
g

ggsave(file = paste0(IO$panels, "symptoms_profile_per_tracking_group_and_BC.pdf"),
       plot = g, 
       width = viz$full_width/2,
       height = 0.3*viz$full_width/2,
       scale = viz$scale*1.2)

Figure 1: Overall tender breasts profiles per age categories and BC for cycles with regular tracking

load(file = paste0(IO$out_Rdata,"TB_overal_pattern_by_age_cat_and_birth_control_for_regular_tracking.Rdata"), verbose = TRUE)
## Loading objects:
##   agg
agg$age_x_BC = interaction(agg$age_cat , agg$BC)
agg$SE = agg$fraction_cycles_with_TB_SE


j = which(agg$age_cat %in% par$age_cat_exclude)
if(length(j)>0){agg = agg[-j,]}
j = which((agg$age_cat == "(35,40]") & (agg$BC == "pill"))
if(length(j)>0){agg = agg[-j,]}

agg = agg[which(agg$tracking_group == "regular tracking"),]

g = ggplot(agg, aes(x = cycleday_m_D, y = fraction_cycles_with_TB, col = age_x_BC, shape = BC)) + 
  geom_vline(xintercept = 0, col = "gray90", size = 1.5)+
  geom_ribbon(aes(ymin = fraction_cycles_with_TB-1.96*SE, ymax = fraction_cycles_with_TB+1.96*SE, fill = age_x_BC), alpha = 0.5, col = NA)+
  geom_line() + geom_point()+ 
  scale_y_continuous(labels = scales::percent)+ scale_x_continuous(breaks = par$x.axis)+
  scale_colour_manual(values= cols$age_x_BC , name="Age group x BC")+
  scale_fill_manual(values= cols$age_x_BC , name="Age group x BC")+
  guides(col = FALSE, fill = FALSE, shape = FALSE)+
  xlab("cycleday from 1st day of menstruation") + ylab("% cycles with reported BT")+
  theme(legend.position="bottom",legend.background = element_rect(color="gray90", fill = NA))#+
#facet_grid(. ~ tracking_group)
g

ggsave(filename = paste0(IO$panels, "symptoms_profile_by_BC_and_age_cat_for_regular_tracking.pdf"), 
       plot = g, 
       width = viz$full_width/2, 
       height = 0.5*viz$full_width/2,
       scale = viz$scale)
load(file = paste0(IO$out_Rdata, "avg_symptoms_per_user.Rdata"), verbose = TRUE)
## Loading objects:
##   agg
g = ggplot(agg[agg$BC %in% c("pill","none / condoms"),], aes(x = n_TB_by_n_cycles, y = perc_users, fill = BC)) +
  geom_bar(stat = "identity", position = "dodge" )+
  scale_fill_manual(values = cols$BC)+
  xlab("# of reported BT symptoms / # of cycles")+ 
  ylab("% of users") + guides(fill = FALSE)
g

ggsave(filename = paste0(IO$panels, "1_avg_TB_per_user.pdf"), plot = g, width = viz$full_width/4.5, height = viz$full_width/4.5, scale = viz$scale)
load(file = paste0(IO$out_Rdata, "n_TB_per_cycle.Rdata"), verbose=TRUE)
## Loading objects:
##   agg
g = ggplot(agg[agg$BC != "unclear",], aes(x = tender_breasts_ul, y = perc_cycles, fill = BC)) +
  geom_bar(stat = "identity", position = "dodge") + 
  scale_fill_manual(values = cols$BC3) + guides(fill = FALSE)+
  xlab("# of reported Tender Breast symptoms per cycle")+ ylab("% of cycles")

g

ggsave(filename = paste0(IO$panels, "1_number_of_TB_per_cycle.pdf"), plot = g, width = viz$full_width/4, height = viz$full_width/4.5, scale = viz$scale)

Figure S2: Predictor of Breast Tenderness.

load(file = paste0(IO$out_Rdata, "symptoms_predictors.Rdata"), verbose=TRUE)
## Loading objects:
##   pred
g_z = ggplot(pred[!grepl("country",pred$input),], aes(x = cycleday_m_D, y = z_value, col = input, linetype = tracking_cluster)) + 
  geom_vline(xintercept = 0, size = 2, col = "gray90")+
  geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
  geom_hline(yintercept = 0, size = 0.1)+
  geom_line() + 
  scale_x_continuous(breaks = par$x.axis)+
  facet_wrap(input ~ .)+
  guides(col = FALSE)
g_z

g_z = g_z + guides(linetype = FALSE)


g_p = ggplot(pred[!grepl("country",pred$input),], aes(x = cycleday_m_D, y = -log10(q_value), col = input, linetype = tracking_cluster)) + 
  geom_vline(xintercept = 0, size = 2, col = "gray90")+
  geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
  geom_hline(yintercept = 0, size = 0.1)+
  geom_line() + 
  scale_x_continuous(breaks = par$x.axis)+
  facet_wrap(input ~ .)+
  guides(col = FALSE)
g_p

g_p = g_p + guides(linetype = FALSE)



g_z_countries = ggplot(pred[grep("country",pred$input),], aes(x = cycleday_m_D, y = z_value, col = input, linetype = tracking_cluster)) + 
  geom_vline(xintercept = 0, size = 2, col = "gray90")+
  geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
  geom_hline(yintercept = 0, size = 0.1)+
  geom_line() + 
  scale_x_continuous(breaks = par$x.axis)+
  facet_wrap(country_name ~ .)+
  guides(col = FALSE)
g_z_countries

g_z_countries = g_z_countries + guides(linetype = FALSE)


g_p_countries = ggplot(pred[grep("country",pred$input),], aes(x = cycleday_m_D, y = -log10(q_value), col = input, linetype = tracking_cluster)) + 
  geom_vline(xintercept = 0, size = 2, col = "gray90")+
  geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
  geom_hline(yintercept = 0, size = 0.1)+
  geom_line() + 
  scale_x_continuous(breaks = par$x.axis)+
  facet_wrap(country_name ~ .)+
  guides(col = FALSE)
g_p_countries

g_p_countries = g_p_countries + guides(linetype = FALSE)




ggsave(filename = paste0(IO$panels, "predictors_z.pdf"), 
       plot = g_z, 
       width = viz$full_width/2, 
       height = 0.75*viz$full_width/2, 
       scale = viz$scale)



ggsave(filename = paste0(IO$panels, "predictors_z_countries.pdf"), 
       plot = g_z_countries, 
       width = viz$full_width/2, 
       height = 0.75*viz$full_width/2, 
       scale = viz$scale)


ggsave(filename = paste0(IO$panels, "predictors_p.pdf"), 
       plot = g_p, 
       width = viz$full_width/2, 
       height = 0.75*viz$full_width/2, 
       scale = viz$scale)



ggsave(filename = paste0(IO$panels, "predictors_p_countries.pdf"), 
       plot = g_p_countries, 
       width = viz$full_width/2, 
       height = 0.75*viz$full_width/2, 
       scale = viz$scale)

Figure 2: Phenotyping Breast Tenderness

Figure 2: Evolution in time

load(file = paste0(IO$out_Rdata,"evolution_of_symptom_tracking_in_time.Rdata"), verbose = TRUE)
## Loading objects:
##   fraction_TB
load(file = paste0(IO$out_Rdata,"evolution_of_symptom_tracking_in_time_min_n_cycles.Rdata"), verbose = TRUE)
## Loading objects:
##   min_n_cycles
fraction_TB$init_TB_group_text = factor(paste0("avg. #BT in cycles 1-5 = ",fraction_TB$init_TB_group))
min_n_cycles$init_TB_group_text = factor(paste0("avg. #BT in cycles 1-5 = ",min_n_cycles$init_TB_group))

fraction_TB$BC_x_init_TB_group =  interaction(fraction_TB$init_TB_group, fraction_TB$BC)
min_n_cycles$BC_x_init_TB_group =  interaction(min_n_cycles$init_TB_group, min_n_cycles$BC)


g = ggplot(fraction_TB, aes(x = cycle_nb_m_rel, y = median_n_TB, col = BC_x_init_TB_group, fill = BC_x_init_TB_group)) + 
  geom_line() + 
  geom_line(aes(y = avg_n_TB), linetype = 3) + 
  geom_ribbon(aes(ymin = q_05_n_TB , ymax = q_95_n_TB), alpha = 0.2, col = NA)+
  geom_ribbon(aes(ymin = q_25_n_TB , ymax = q_75_n_TB), alpha = 0.2, col = NA)+
  geom_text(data = min_n_cycles, aes(label = paste0("min # = ",min_n_cycles)), x = 0.5, y = 0.95*max(fraction_TB$q_95_n_TB), size = 3, vjust = 0, hjust = 0)+
  facet_grid(BC  ~ init_TB_group_text )+
  scale_y_continuous(breaks = seq(0,21, by = 3), minor_breaks = 0:21)+
  scale_color_manual(values = cols$BC_x_init_TB_group) + scale_fill_manual(values = cols$BC_x_init_TB_group)+ 
  guides(col = FALSE, fill = FALSE) +
  xlab("Cycle number") + ylab("# of reported BT")
g

ggsave(filename = paste0(IO$panels, "evolution_in_time.pdf"), 
       plot = g, 
       width = viz$full_width/2, 
       height = 0.5*viz$full_width/2,
       scale = viz$scale)

Figure 2: Duration, Timing, Variability and Cyclicity

i = 2
load(paste0(IO$out_Rdata, "CVDT_histograms_",i,".Rdata"), verbose = TRUE)
## Loading objects:
##   data_agg
data_agg$BC = factor(data_agg$BC, levels = rev(par$BC_dict$name))


g = ggplot(data_agg, aes(x = value, y = perc_stretches, fill = BC))+
  geom_bar(aes(width = by),stat = "identity", position = "identity", alpha = 0.4)+
  #geom_area(position = "identity", alpha = 0.5)+
  facet_wrap("variable", scales = "free")+
  ylab(ifelse(i == 1,"% of stretches of 6 consecutive cycles","% of users"))+xlab("")+
  guides(col = FALSE, fill = FALSE)+
  scale_fill_manual(values = rev(cols$BC))+
  theme(strip.background = element_rect(fill = "gray90", color = NA))
## Warning: Ignoring unknown aesthetics: width
g

ggsave(filename = paste0(IO$panels, "CVDT_",ifelse(i == 1,"stretches","users"),".pdf"), 
       plot = g, 
       width = viz$full_width/2, 
       height = 0.5*viz$full_width/2,
       scale = viz$scale)
load(file = paste0(IO$out_Rdata,"selected_examples_cvdt.Rdata"), verbose = TRUE)
## Loading objects:
##   df
##   d
##   selected_average_profiles_with_cvdt
##   selected_cvdt_stretches
df$wrapped_names = gsub(", ","\n",df$names)

d$stretch_id = factor(d$stretch_id, levels = df$stretch_id)

m = match(d$stretch_id, df$stretch_id)
d$cat = df$cat[m]
d$wrapped_names = df$wrapped_names[m]
d$wrapped_names = factor(d$wrapped_names, levels = unique(df$wrapped_names))

agg = aggregate(cycle_nb_m ~ stretch_id, d, min)
d$min_cycle_number = agg$cycle_nb_m[match(d$stretch_id, agg$stretch_id)]
d$cycle_nb_m_rel = d$cycle_nb_m - d$min_cycle_number
d$cycle_nb_m_o = d$cycle_nb_m
d$cycle_nb_m = d$cycle_nb_m_rel

g_ex = ggplot_imputed_TB(sel_d = d, facet_grid_y = "wrapped_names", facet_grid_x = "BC", col = "BC", cycle_id = FALSE)+
  scale_color_manual(values = cols$BC)+scale_fill_manual(values = cols$BC)+
  theme(strip.text.y = element_text(angle = 0, hjust = 0))+
  scale_size(range = c(1,3))+
  ylab("")

g_ex

ggsave(filename = paste0(IO$panels, "CVDT_examples.pdf"), 
       plot = g_ex, 
       width = viz$full_width*0.47, 
       height = viz$full_width/2.5,
       scale = viz$scale*1.3)


g_profiles = ggplot(selected_average_profiles_with_cvdt, aes(x = cycleday_m_D, y = average_profile, col = BC, fill = BC) )+
  geom_point()+
  geom_line()+
  geom_area(alpha = 0.3, col = NA)+
  guides(col = FALSE, fill = FALSE)+
  facet_grid(cat ~ BC)+
  scale_color_manual(values = cols$BC)+scale_fill_manual(values = cols$BC)

g_profiles

g_stat= ggplot(df, aes(x = as.factor(cat), y = perc_stretches/100, fill = BC))+
  coord_flip()+
  geom_bar(stat = "identity", position = "dodge")+
  scale_fill_manual(values = cols$BC)+
  guides(fill = FALSE)+
  scale_y_continuous(labels = scales::percent)+
  scale_x_discrete(labels = NULL)+
  ylab("% of stretches")+
  xlab("")
g_stat

g_stat= ggplot(df, aes(x = factor(cat, levels = rev(sort(unique(cat)))), y = perc_users/100, fill = BC))+
  coord_flip()+
  geom_bar(stat = "identity", position = "dodge", width = 0.8)+
  scale_fill_manual(values = cols$BC)+
  guides(fill = FALSE)+
  scale_y_continuous(labels = scales::percent)+
  scale_x_discrete(labels = NULL, breaks = NULL)+
  ylab("% of users")+
  xlab("")
g_stat

ggsave(filename = paste0(IO$panels, "CVDT_stats.pdf"), 
       plot = g_stat, 
       width = viz$full_width*0.5, 
       height = viz$full_width,
       scale = viz$scale*0.5)




cvdt_col = c("duration","timing","variability","cyclicity")
data.frame(selected_cvdt_stretches$stretch_id,round(selected_cvdt_stretches[,cvdt_col],2))
##              selected_cvdt_stretches.stretch_id duration timing
## 1   c352e33efb42cf9ca95dc325956273b6f5c9eb7a_s1     2.81  -6.63
## 2   14c57979ce182fcfa77f4a1668717a6e26cb984a_s1     4.50  -5.56
## 3   cb101d3ed720edafb4fd9ba9f2d240021bcb7226_s4    23.06  -6.24
## 4   af33d45b21b5cb5e49e5d82f31b4b93c0d9d963f_s6    16.29  -7.23
## 5   d3555185f2b97a22e94aa545fbd494caa6201c0b_s2     5.50  -2.73
## 6   2d4d71ed5b6cdc4719c2399e117527722b113179_s4     6.83  -0.63
## 7   04a81d7bf7e9ac1a7ef8a69b1afcac35263dbace_s1     4.33  -1.54
## 8   57945d2b174fc9593f848a2ccfd86d943e4c6577_s1     1.67  -1.40
## 9   f33152aa3cdb104b418fa931e367c73528d902fc_s4     4.90  -3.80
## 10  4f441d56d0540c6d2406a158315cdc0c4c813934_s2    13.25  -4.55
## 11 939ed23499de861e6ad9b8185d86b8f37cd3af3b_s10     1.75  -4.29
## 12  230ceb24fe4a27283898f941da648c90c9b6164d_s1     7.20  -5.28
## 13  c1bb30485db6e05ebe56b1f6cfc68c9c4b5feaa3_s6    19.17  -5.77
## 14  1aaa3fb10f68733ca0d8bca2fdd1ee03e6dbe42f_s2    14.17  -8.06
## 15  9d184b70d132b823801db15906e5f2f772263f00_s1     7.50  -5.18
## 16  39cd0f53570b53f5087312212c3d6a60754d2826_s1     7.00   2.00
## 17  37c55ec2d00e2453189267d3b03bfb76abb6feb1_s2     4.67  -7.14
## 18  096f59b43b8894a2a9ee90aae26701a7a9415fe2_s2     1.80   0.89
## 19  94792dd88d1324fe485b35c3953a1b1db6a654ea_s2     5.33  -8.59
## 20  4748c7f0f819773dfbb38c311f3886800405e2e6_s2     4.40  -6.82
##    variability cyclicity
## 1         0.42      0.00
## 2         0.69      0.00
## 3         0.07      0.20
## 4         0.15      0.73
## 5         0.67      0.80
## 6         0.23      0.79
## 7         0.66      0.85
## 8         0.70      0.79
## 9         1.00      0.00
## 10        0.68      0.46
## 11        0.56      0.41
## 12        0.42      0.00
## 13        0.24      0.00
## 14        0.34      0.63
## 15        0.18      0.84
## 16        0.00      1.00
## 17        0.77      0.78
## 18        0.58      0.70
## 19        0.65      0.00
## 20        0.95      0.34
selected_cvdt_stretches$cat = df$cat[match(selected_cvdt_stretches$stretch_id, df$stretch_id)]
id_vars = c("stretch_id","cycle_nb","BC","user_id","cycle_id_m","sd","s_id","cat")
varying_vars = cvdt_col
df_ex_values = melt(as.data.frame(selected_cvdt_stretches[,c(id_vars,varying_vars)]), id.vars = id_vars)

g_ex_values = ggplot(df_ex_values, aes(col = BC))+
  geom_text(aes(label = round(value,2)), x = 1, y = 1)+
  xlim(c(0,3))+ylim(c(0,2))+
  facet_grid(cat + variable ~ BC)+
  guides(col = FALSE)+
  scale_color_manual(values = cols$BC)+
  theme(axis.ticks = element_blank(),
        axis.text = element_blank(),
        panel.grid = element_blank(),
        strip.text.x = element_blank(),
        strip.text.y = element_text(angle = 0))
g_ex_values

ggsave(filename = paste0(IO$panels, "CVDT_examples_values.pdf"), 
       plot = g_ex_values, 
       width = viz$full_width*0.25, 
       height = viz$full_width,
       scale = viz$scale*0.8)

Figure 2: Consistency of Symptoms

load(file = paste0(IO$out_Rdata,"consistent_and_inconsistent_users.Rdata"), verbose = TRUE)
## Loading objects:
##   sel_d
sel_d_consistent = sel_d[sel_d$consistent == "consistent", ]
sel_d_average = sel_d[sel_d$consistent == "average", ]
sel_d_inconsistent = sel_d[sel_d$consistent == "inconsistent", ]


if((nrow(sel_d_inconsistent)>0)&(nrow(sel_d_consistent)>0)){
  
  g_consistent = ggplot_imputed_TB(sel_d = sel_d_consistent, 
                                   facet_grid = c("BC","init_TB_group","user_id"), cycle_id = FALSE, col = "user_id")
  
  
  g_average = ggplot_imputed_TB(sel_d = sel_d_average, 
                                facet_grid = c("BC","init_TB_group","user_id"), cycle_id = FALSE, col = "user_id")
  
  
  
  g_inconsistent = ggplot_imputed_TB(sel_d = sel_d_inconsistent, 
                                     facet_grid = c("BC","init_TB_group","user_id"), cycle_id = FALSE, col = "user_id")
  
  grid.arrange(g_inconsistent, g_average, g_consistent, nrow = 1)
}

load(file = paste0(IO$out_Rdata,"consistent_and_inconsistent_users.Rdata"), verbose = TRUE)
## Loading objects:
##   sel_d
user_ids = 
  c("586e33d674b46555254e908ec42d6771ed294bcd", "ff484ef33fd2c29086617ed63a9814571b86f15d", 
    "677332825213f1f0cb1e6831ead1fc787cf01663", "8714aad3100409673b8762775e474aff17321df4", 
    "40620830924a2f02935e3a6c6102f84e5de35fce","6ef947aeb9cfe0468bd1efd26b8a10dca0a1e645")

sel_d = sel_d[which(sel_d$user_id %in% user_ids),]

sel_d_consistent = sel_d[sel_d$consistent == "consistent", ]
sel_d_average = sel_d[sel_d$consistent == "average", ]
sel_d_inconsistent = sel_d[sel_d$consistent == "inconsistent", ]

if((nrow(sel_d_inconsistent)>0)&(nrow(sel_d_consistent)>0)&(nrow(sel_d_average)>0)){
  
  g_consistent = ggplot_imputed_TB(sel_d = sel_d_consistent, 
                                   facet_grid = c("BC"), cycle_id = FALSE, col = "BC")+
    scale_color_manual(values = cols$BC)+ylab("")+theme(strip.text = element_blank())
  
  g_average = ggplot_imputed_TB(sel_d = sel_d_average, 
                                facet_grid = c("BC"), cycle_id = FALSE, col = "BC")+
    scale_color_manual(values = cols$BC)+ylab("")+theme(strip.text = element_blank())
  
  
  g_inconsistent = ggplot_imputed_TB(sel_d = sel_d_inconsistent, 
                                     facet_grid = c("BC"), cycle_id = FALSE, col = "BC")+
    scale_color_manual(values = cols$BC)+ylab("")+theme(strip.text = element_blank())
  
  
  grid.arrange(g_inconsistent, g_average, g_consistent, nrow = 1)
  g_consistency = arrangeGrob(g_inconsistent, g_average, g_consistent, nrow = 1, widths = c(1,1,1))
  g_consistency
  
  ggsave(filename = paste0(IO$panels, "consistency_examples.pdf"), 
         plot = g_consistency, 
         width = viz$full_width/2, 
         height = 0.33*viz$full_width/2,
         scale = viz$scale*1.7)
  
  
}

load(file = paste0(IO$out_Rdata,"consistency_summary.Rdata"), verbose = TRUE)
## Loading objects:
##   df
df$BC = factor(df$BC, levels = c(as.character(par$BC_dict$name),"both"))
df$BC_x_init_TB_group =  interaction(df$init_TB_group, df$BC)


g = ggplot(df, aes(x = consistency, y = n_users, fill = BC_x_init_TB_group)) + 
  geom_area(position = "identity", alpha = 0.75) + 
  facet_grid(BC ~ . , scale = "free_y")+
  scale_fill_manual(values = cols$BC_x_init_TB_group2)+
  guides(fill = FALSE)
g

ggsave(filename = paste0(IO$panels, "consistency_summary.pdf"), 
       plot = g, 
       width = viz$full_width/2.5, 
       height = 0.6*viz$full_width/2.5,
       scale = viz$scale)

Phenotyping (Dimension Reduction and clustering)

for(bc in c("NC","pill")){
  i = (bc == "pill")+1
  BC = par$BC_dict$name[i]
  load(file = paste0(IO$tmp_data,"user_phenotyping_mds_",bc,".Rdata"), verbose = TRUE)
  load(file = paste0(IO$tmp_data,"user_phenotyping_this_c_wide_",bc,".Rdata"), verbose = TRUE)
  
  
  # screeplot
  
  g = ggplot(eig_df[1:10,], aes(x = as.factor(x), y = eig)) + 
    geom_bar(stat = "identity", fill = cols$BC[i])+
    xlab("dimensions")+ggtitle(BC)
  g
  
  eig_df$var_expl = round(100*eig_df$eig/sum(eig_df$eig),1)
  g = ggplot(eig_df[1:10,], aes(x = as.factor(x), y = var_expl)) + 
    geom_bar(stat = "identity", fill = cols$BC[i])+
    xlab("dimensions")+
    ylab("% of variance explained")+
    ggtitle(BC)
  g
  
  
  ggsave(filename = paste0(IO$panels, "S_screeplot_",bc,".pdf"),
         plot = g,
         width = viz$full_width/4,
         height = 0.75*viz$full_width/4,
         scale = viz$scale)
  
  
  # interactive 3D
  
  plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
          color = ~n_days,
          type = "scatter3d", mode = "markers",
          marker = list(size = 4),
          text = ~paste('ID: ', user_id))
  
  
  plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
          color = ~time,
          type = "scatter3d", mode = "markers",
          marker = list(size = 4),
          text = ~paste('ID: ', user_id))
  
  
  plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
          color = ~max,
          type = "scatter3d", mode = "markers",
          marker = list(size = 4),
          text = ~paste('ID: ', user_id))
  
  plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
          color = ~time_last_symptoms,
          type = "scatter3d", mode = "markers",
          marker = list(size = 4),
          text = ~paste('ID: ', user_id))
  
  # interactive 2D
  
  
  plot_ly(mds_df, x = ~X1, y = ~X2,
          color = ~X3,
          type = "scatter", mode = "markers",
          marker = list(size = 4),
          text = ~paste('ID: ', user_id))
  
  plot_ly(mds_df, x = ~X1, y = ~X3,
          color = ~X2,
          type = "scatter", mode = "markers",
          marker = list(size = 4),
          text = ~paste('ID: ', user_id))
  
  
  
  
  if(bc == "pill"){
    #mds_df$user_id[grep("65035d",mds_df$user_id)]
    user_ids = c("0ceafb77c214fb6e45d069e6c645083a9f7799d0","d0fcb8602f237946a56162f047c8ef3ef254541c",special_pill_user,
                 "73266275a3d6f3898cf58269070edb4d4edd2034","e610274623760e9c4f1a50e0ed5a00f17117ed0b",
                 "65035d05a6ccabaaeb82ecd4e17bde1c096adc2f",
                 "c45cd0e050cf6d704da32cbab899bacf51a519a3")
  }else{
    user_ids = c("2ac70e8def770369d086e9249fee9d5b3485ad0d","cb2700ea5a33499b1c97113444e1b2e5d0d09162",
                 "c4bf18fef2f8db48d6f4a881b1a23be849cbbc06","b70dde8d03e0f39bdb4cb67c88b0d26ad6002bd1",
                 "2eedf35808effb0a40381e11d1c94257096ab8de","cffcd394d8f769d363f67eada7f16b0106c3191a",
                 "b9590a55307d916d4aabc8e386bc76586a28ae0b")
    
    user_ids = c("2ac70e8def770369d086e9249fee9d5b3485ad0d","cb2700ea5a33499b1c97113444e1b2e5d0d09162",
                 "cffcd394d8f769d363f67eada7f16b0106c3191a","b70dde8d03e0f39bdb4cb67c88b0d26ad6002bd1",
                 "c4bf18fef2f8db48d6f4a881b1a23be849cbbc06","34c8890de2da03e29a7f93771ea9ecaf12cf4c88",
                 "2eedf35808effb0a40381e11d1c94257096ab8de")
  }
  
  j_mds = which(mds_df$user_id %in% user_ids)
  mds_df_subset = mds_df[j_mds,]
  mds_df_subset$user_id = factor(as.character(mds_df_subset$user_id), levels = user_ids)
  
  # MDS
  g_12 = ggplot(mds_df, aes(x = X1, y = X2)) + coord_fixed()+
    geom_point(alpha = 0.5, size = 0.3, col = "gray40")+
    geom_point(data = mds_df_subset, aes(x = X1, y = X2, col = user_id), size = 2, alpha = 0.7)+
    scale_x_continuous(breaks = seq(-1,1,by = 0.2))+
    scale_y_continuous(breaks = seq(-1,1,by = 0.2))+
    xlab(paste0("X1 (",round(100*eig_df$eig[1]/sum(eig_df$eig),1),"% variance)"))+
    ylab(paste0("X2 (",round(100*eig_df$eig[2]/sum(eig_df$eig),1),"% variance)"))+
    guides(col = FALSE)
  
  g_13 = ggplot(mds_df, aes(x = X1, y = X3)) + coord_fixed()+
    geom_point(alpha = 0.5, size = 0.3, col = "gray40") +
    geom_point(data = mds_df_subset, aes(x = X1, y = X3, col = user_id), size = 2, alpha = 0.7)+
    scale_x_continuous(breaks = seq(-1,1,by = 0.2))+
    scale_y_continuous(breaks = seq(-1,1,by = 0.2))+
    xlab(paste0("X1 (",round(100*eig_df$eig[1]/sum(eig_df$eig),1),"% variance)"))+
    ylab(paste0("X3 (",round(100*eig_df$eig[3]/sum(eig_df$eig),1),"% variance)"))+
    guides(col = FALSE)
  
  grid.arrange(g_12, g_13, nrow = 1)
  g_mds = arrangeGrob(g_12, g_13, nrow = 2)
  g_mds
  
  ggsave(filename = paste0(IO$panels, "mds_",bc,".pdf"), 
         plot = g_mds, 
         width = viz$full_width/4, 
         height = viz$full_width/2.2,
         scale = viz$scale)
  
  
  ggsave(filename = paste0(IO$panels, "mds_",bc,"_12.pdf"), 
         plot = g_12, 
         width = viz$full_width/4, 
         height = viz$full_width/4,
         scale = viz$scale)
  
  ggsave(filename = paste0(IO$panels, "mds_",bc,"_13.pdf"), 
         plot = g_13, 
         width = viz$full_width/4, 
         height = viz$full_width/4,
         scale = viz$scale)
  
  
  c_wide_subset = this_c_wide[which(this_c_wide$user_id %in%  user_ids),]
  avg_profiles = melt(c_wide_subset)
  avg_profiles$avg_tender_breast = avg_profiles$value
  avg_profiles$user_id = factor(as.character(avg_profiles$user_id), levels = user_ids)
  
  g_profiles = ggplot(avg_profiles, aes(x = cycleday_m_D, y = avg_tender_breast, fill = user_id, col = user_id))+
    #geom_point()+
    #geom_line()+
    geom_area(position = "identity", alpha = 0.75)+
    facet_grid(user_id ~ .)+
    guides(col = FALSE, fill = FALSE)+
    theme(strip.text = element_blank())+
    scale_y_continuous(breaks = c(0,0.5,1))+
    scale_x_continuous(breaks = par$x.axis)+
    xlab("cycleday from menstruation")+
    ylab("average breast tenderness")
  g_profiles
  
  ggsave(filename = paste0(IO$panels, "profiles_",bc,".pdf"), 
         plot = g_profiles, 
         width = viz$full_width/4, 
         height = 0.65*viz$full_width/4,
         scale = viz$scale*1.5)
  
}
## Loading objects:
##   mds_df
##   eig_df
##   rtsne_out_df
## Loading objects:
##   this_c_wide
## Loading objects:
##   mds_df
##   eig_df
##   rtsne_out_df
## Loading objects:
##   this_c_wide

Figure 3: PILL TRANSITION

Average Profiles

load(file = paste0(IO$out_Rdata,"pill_transition_profiles.Rdata"), verbose = TRUE)
## Loading objects:
##   transition_profiles
transition_profiles$TB_change_cat_simple = factor(transition_profiles$TB_change_cat_simple  , levels = c("increase","no change","decrease"))

agg = unique(transition_profiles[,c("TB_change_cat_simple","transition","n_users")])
agg$cycle_nb_m_from_trans = -4
agg
##      TB_change_cat_simple transition n_users cycle_nb_m_from_trans
## 1                decrease   off pill     534                    -4
## 206              increase   off pill     989                    -4
## 414             no change   off pill     232                    -4
## 622              decrease    on pill    4544                    -4
## 830              increase    on pill    3165                    -4
## 1038            no change    on pill    2146                    -4
agg_sum = aggregate( n_users ~ transition, agg, FUN = sum )
colnames(agg_sum) = c("transition","n_users_tot")
agg = merge(agg, agg_sum, all = TRUE)
agg$perc_users = paste0(round(100*agg$n_users/agg$n_users_tot),"%")
agg$text = paste0(agg$n_users, "\n",agg$perc_users)

g = ggplot(transition_profiles, aes(x = cycleday_m_D, y = fraction_cycles_with_TB, col = BC)) + 
  geom_line()+ facet_grid(transition + TB_change_cat_simple ~ cycle_nb_m_from_trans) +
  scale_x_continuous(breaks = par$x.axis, limits = c(-par$D,par$Df))+ guides(col = FALSE)+
  geom_text(data = agg, aes(x = -par$D, y = 0.25*max(transition_profiles$fraction_cycles_with_TB), col = transition, label = text), 
            nudge_x = 7, nudge_y = 0, size = 3.5)+
  scale_color_manual(values = rep(cols$BC,2))+
  scale_y_continuous(labels = scales::percent)+
  xlab("cycleday from menstruation")+
  ylab("% of cycles with reported breast tenderness")
print(g)

ggsave(filename = paste0(IO$panels, "profiles_pill_transition.pdf"), 
       plot = g, 
       width = viz$full_width/2, 
       height = 0.85*viz$full_width/2,
       scale = viz$scale*1.1)

Fold change at transition

load(file = paste0(IO$out_Rdata,"table_TB_change_cat_x_BC_df.Rdata"), verbose = TRUE)
## Loading objects:
##   table_TB_change_cat_x_BC_df
load(file = paste0(IO$out_Rdata,"hist_log2_TB_ratio_x_BC.Rdata"), verbose = TRUE)
## Loading objects:
##   hist_log2_TB_ratio_x_BC
agg = aggregate(n_users ~ BC, table_TB_change_cat_x_BC_df, sum)
table_TB_change_cat_x_BC_df$total_n_users_per_BC = agg$n_users[match(table_TB_change_cat_x_BC_df$BC, agg$BC)]
table_TB_change_cat_x_BC_df$perc_users = round(100*table_TB_change_cat_x_BC_df$n_users/table_TB_change_cat_x_BC_df$total_n_users_per_BC)
table_TB_change_cat_x_BC_df$perc_users_text = paste0(table_TB_change_cat_x_BC_df$perc_users," %")


g = ggplot(table_TB_change_cat_x_BC_df, aes(x = TB_change_cat, y = n_users, fill = BC))+ # _wrap
  geom_bar(stat = "identity")+
  geom_text(aes(label = perc_users_text, y = 0), #col = BC,
            #position = position_dodge(width = 1),
            col = "white",
            hjust = -0.1)+
  facet_grid(.~BC , scale = "free")+
  xlab("")+ylab("# of users")+
  scale_fill_manual(values = cols$BC)+
  scale_color_manual(values = cols$BC)+
  coord_flip()+
  theme(strip.text = element_blank())
g

ggsave(filename = paste0(IO$panels, "pill_transition_TB_change_cat_x_BC.pdf"), 
       plot = g, 
       width = viz$full_width/2, 
       height = 0.3*viz$full_width/2,
       scale = viz$scale)



g_hist = ggplot(hist_log2_TB_ratio_x_BC, aes(x = log2_TB_ratio, y = n_users, fill = transition))+
  geom_bar(stat = "identity", width = 0.5)+
  facet_grid(transition ~., scale = "free")+
  xlab("log2(BT ratio: after/before)")+ylab("# of users")+
  #geom_vline(xintercept = c(-value_inf+0.5,-1.5,-0.5,0.5,1.5,value_inf-0.5), col = "gray20", linetype = 2, size = 0.2)+
  geom_vline(xintercept = c(-2,-0.5,0.5,2), col = "gray20", linetype = 2, size = 0.2)+
  geom_vline(xintercept = 0, col = "gray20", linetype = 1, size = 0.5)+
  scale_fill_manual(values = cols$BC)+
  scale_x_continuous(breaks = seq(-6,6,by = 1))
g_hist

ggsave(filename = paste0(IO$panels, "pill_transition_hist_log2_TB_ratio_x_BC.pdf"), 
       plot = g_hist, 
       width = viz$full_width/2, 
       height = 0.4*viz$full_width/2,
       scale = viz$scale)

Examples

load(file = paste0(IO$out_Rdata,"days_selected_users_for_pill_transition_examples.Rdata"), verbose = TRUE)
## Loading objects:
##   selected_users_days
selected_users_days$user_id =   selected_users_days$stretch_id

stretch_ids = unique(selected_users_days$stretch_id)
for(stretch_id in stretch_ids){
  cat("\t\t",stretch_id,"\n")
  this_stretch_days = selected_users_days[selected_users_days$stretch_id == stretch_id,]
  trans = unique(this_stretch_days$transition)
  g = ggplot_user_history(this_stretch_days, pill_transition = trans, print_x_lab = FALSE, print_title = FALSE, make_x_axis_symmetrical = TRUE)
  print(g)
  
  ggsave(filename = paste0(IO$panels, "profiles_pill_transition_example_",stretch_id,".pdf"), 
         plot = g, 
         width = viz$full_width/2, 
         height = 0.18*viz$full_width/2,
         scale = viz$scale*1.2)
  
  
}
##       user_1

##       user_2

##       user_3

##       user_4

##       user_5

##       user_6

##       user_7

##       user_8

##       user_9

##       user_10